In [1]:
from sys import path
path.append("../../")
import pandas as pd
import seaborn as sns
from matplotlib import pyplot
from utils.data_cube_utilities import dc_display_map
%matplotlib inline
In [2]:
df = pd.read_csv('../data.csv')
df.head()
Out[2]:
In [3]:
df.groupby("LandUse").size()
Out[3]:
In [4]:
fig, ax = pyplot.subplots(figsize=(15,3))
sns.countplot(x="LandUse",data=df, palette="Greens_d");
In [5]:
df_new = df.copy()
df_new['LandUse'].update(df_new['LandUse'].map(lambda x: "Forest" if x in ["Forestry","Fruittrees","Nativeforest"] else x ))
df_new['LandUse'].update(df_new['LandUse'].map(lambda x: "Misc" if x not in ["Forest","Prairie","Summercrops","Naturalgrassland"] else x ))
In [6]:
df_new.groupby("LandUse").size()
Out[6]:
In [7]:
fig, ax = pyplot.subplots(figsize=(15,5))
sns.countplot(x="LandUse",data=df_new, palette="Greens_d");
In [8]:
dc_display_map.display_grouped_pandas_rows_as_pins(df_new, group_name= "LandUse")
Out[8]:
In [9]:
output_destination_name = "./relabeled_data.csv"
In [10]:
## Recap of structure
df_new.head()
Out[10]:
In [11]:
df_new.to_csv(output_destination_name)
In [12]:
!ls